 ///
 /// @file    rss.cc
 /// @author  lemon(haohb13@gmail.com)
 /// @date    2023-05-02 11:36:42
 ///
 
#include "tinyxml2.h"
#include <regex>
#include <vector>
#include <fstream>
#include <iostream>

using std::cout;
using std::endl;
using std::vector;
using std::string;
using std::ofstream;

using namespace tinyxml2;

struct RssItem {
    string title;
    string link;
    string description;
    string content;
};

class RssReader{
public:
    RssReader()	{	_rss.reserve(100); }
    void parseRss();//解析
    void dump(const string & filename);//输出
private:
    vector<RssItem> _rss;
};
 
void RssReader::parseRss() 
{
	XMLDocument doc;
	doc.LoadFile("coolshell.xml");
	if(doc.ErrorID()) {
		return;
	}

	XMLElement * root = doc.FirstChildElement("rss");
	//先获取第一篇文章的信息
	XMLElement * itemElem = root->FirstChildElement("channel")->FirstChildElement("item");

	std::regex re("<.*?>");
	do {
		XMLElement * titleElem = itemElem->FirstChildElement("title");
		const char * title = titleElem->GetText();
		//cout << "title:" << title << endl;

		XMLElement * linkElem = itemElem->FirstChildElement("link");
		const char * link = linkElem->GetText();
		//cout << "link:" << link << endl;

		XMLElement * descriptElem = itemElem->FirstChildElement("description");
		const char * descript = descriptElem->GetText();
		//cout << "description:" << descript << endl;
		string origin1(descript);

		XMLElement * contentElem = itemElem->FirstChildElement("content:encoded");
		const char * content = contentElem->GetText();

		string origin2(content);
		RssItem item;
		item.title = title;
		item.link = link;
		item.description = std::regex_replace(origin1, re, string(""));
		item.content = std::regex_replace(origin2, re, string(""));
		cout << "description:" << item.description << endl;
		cout << "content:" << item.content << endl;
		_rss.push_back(item);
	} while((itemElem = itemElem->NextSiblingElement("item")) != nullptr);
} 

void RssReader::dump(const string &filename)
{
	ofstream ofs(filename);
	if(!ofs) {
		cout << "ofstream open file " << filename << " error!\n";
		return;
	}

	string docBegin("<doc>\n");
	string docEnd("</doc>\n");
	string docIdBegin("<docid>");
	string docIdEnd("</docid>\n");
	string docTitleBegin("<title>");
	string docTitleEnd("</title>\n");
	string docLinkBegin("<link>");
	string docLinkEnd("<link>\n");
	string docDescriptBegin("<description>");
	string docDescriptEnd("</description>\n");
	string docContentBegin("<content>");
	string docContenEnd("</content>\n");
	long cnt = 1;
	for(auto & rssitem : _rss) {
		string doc(docBegin);
		doc.append(docIdBegin).append(std::to_string(cnt++)).append(docIdEnd)
		   .append(docTitleBegin).append(rssitem.title).append(docTitleEnd)
		   .append(docLinkBegin).append(rssitem.link).append(docLinkEnd)
		   .append(docDescriptBegin).append(rssitem.description).append(docDescriptEnd)
		   .append(docContentBegin).append(rssitem.content).append(docContenEnd)
		   .append(docEnd);
		ofs << doc;
	}
	ofs.close();
}
 
int main(void)
{
	RssReader rssReader;
	rssReader.parseRss();
	rssReader.dump("pagelib.txt");
	return 0;
}
